//
//  adaboost.cpp
//  FastPR
//
//  Created by fanwenjie on 2017/2/4.
//  Copyright © 2017年 fanwenjie. All rights reserved.
//

#include "adaboost.h"
#include "canny.h"
//#include "imageproc.h"
#include <stdlib.h>
#include <math.h>



#define MIN_NEIGHBORS 3
#define GROUP_EPS 0.2
#define CALC_SUM_OFS_(p0, p1, p2, p3, ptr) ((ptr)[p0] + (ptr)[p3] - (ptr)[p1] - (ptr)[p2])
#define THREAD_NUM 4
#define MAX_BUFFER_SIZE 1024

typedef _Atomic(unsigned int)   atomic_uint;

static void Image32Resize(const unsigned char *srcImage, const short srcWidth,const short srcHeight,const short desWidth,const short desHeight,unsigned int *desImage)
{
    int iSrc1[desWidth],iSrc2[desWidth];
    int iW1[desWidth],iW2[desWidth];
    const unsigned int area = ((unsigned int)desHeight) * desWidth;
    for(int i = 0;i < desWidth; ++i)
    {
        int value = i * srcWidth;
        iSrc1[i] = value / desWidth;
        iW2[i] = value % desWidth;
        iW1[i] = desWidth - iW2[i];
        iSrc2[i] = iSrc1[i] + (iW2[i] != 0);
    }
    
    for(int j = 0;j < desHeight; ++j)
    {
        int value = j * srcHeight;
        int jSrc1 = value / desHeight;
        int jw2 = value % desHeight;
        const unsigned char *srcj1 = srcImage + srcWidth * jSrc1;
        unsigned int *desj1 = desImage + j*desWidth;
        if(jw2){
            int jw1 = desHeight - jw2;
            const unsigned char *srcj2 = srcj1 + srcWidth;
            for(int i = 0;i < desWidth; ++i)
            {
                int iw1 = iW1[i], iw2 = iW2[i];
                int i1 = iSrc1[i],i2 = iSrc2[i];
                desj1[i] = (jw1 * (srcj1[i1] * iw1 + srcj1[i2] * iw2) + jw2 * (srcj2[i1] * iw1 + srcj2[i2] * iw2)) / area;
            }
        }else {
            for(int i = 0;i < desWidth; ++i)
            {
                int iw1 = iW1[i], iw2 = iW2[i];
                int i1 = iSrc1[i],i2 = iSrc2[i];
                desj1[i] = (srcj1[i1] * iw1 + srcj1[i2] * iw2) / desWidth;
            }
        }
    }
}

static void ImageIntegral(const unsigned int *origImage,const short width,const short height,unsigned int *integralImage)
{
    for(int j = 0; j < height * width; j += width)
    {
        const unsigned int *pSrc = origImage + j;
        unsigned int *pDes = integralImage + j;
        *pDes = *pSrc;
        for(int i = 1;i < width; ++i)
            pDes[i] = pSrc[i] + pDes[i - 1];
    }
    for(int j = width; j < height * width; j += width)
    {
        unsigned int *pDes = integralImage + j;
        unsigned int *pSrc = pDes - width;
        for(int i = 0;i < width; ++i)
            *pDes++ += *pSrc++;
    }
}

static unsigned int RectanglesCluster(Rect rects[],unsigned int rectNum, unsigned int labels[],double eps)
{
    const static int PARENT = 0 ,RANK = 1;
    unsigned int classNum = 0;
    int nodes[rectNum][2];
    // The first O(N) pass: create N single-vertex trees
    for(int i = 0; i < rectNum; ++i)
    {
        nodes[i][PARENT]= -1;
        nodes[i][RANK] = 0;
    }
    // The main O(N^2) pass: merge connected components
    for(int i = 0; i < rectNum; ++i)
    {
        int iRoot = i;
        
        // find root
        while( nodes[iRoot][PARENT] >= 0 )
            iRoot = nodes[iRoot][PARENT];
        int j;
        for( j = 0; j < rectNum; ++j)
        {
            const Rect *pRect1 = rects + i, *pRect2 = rects + j;
            double delta = pRect1->width < pRect2->width ? pRect1->width : pRect2->width;
            delta += pRect1->height <pRect2->height ? pRect1->height : pRect2->height;
            delta *= eps*0.5;
            if( i == j ||
               abs(((int)pRect1->x) - pRect2->x) > delta ||
               abs(((int)pRect1->y) - pRect2->y) > delta ||
               abs(((int)pRect1->x) + pRect1->width - pRect2->x - pRect2->width) > delta ||
               abs(((int)pRect1->y) + pRect1->height - pRect2->y - pRect2->height) > delta)
                continue;
            int jRoot = j;
            
            while( nodes[jRoot][PARENT] >= 0 )
                jRoot = nodes[jRoot][PARENT];
            
            if( jRoot != iRoot )
            {
                // unite both trees
                int rank = nodes[iRoot][RANK], rank2 = nodes[jRoot][RANK];
                if( rank > rank2 )
                    nodes[jRoot][PARENT] = iRoot;
                else
                {
                    nodes[iRoot][PARENT] = jRoot;
                    nodes[jRoot][RANK] += rank == rank2;
                    iRoot = jRoot;
                }
                
                int k = j, parent;
                
                // compress the path from node2 to root
                while( (parent = nodes[k][PARENT]) >= 0 )
                {
                    nodes[k][PARENT] = iRoot;
                    k = parent;
                }
                
                // compress the path from node to root
                k = i;
                while( (parent = nodes[k][PARENT]) >= 0 )
                {
                    nodes[k][PARENT] = iRoot;
                    k = parent;
                }
            }
        }
    }
    
    // Final O(N) pass: enumerate classes
    
    for(int i = 0; i < rectNum; ++i)
    {
        int root = i;
        while( nodes[root][PARENT] >= 0 )
            root = nodes[root][PARENT];
        // re-use the rank as the class label
        if( nodes[root][RANK] >= 0 )
            nodes[root][RANK] = ~classNum++;
        labels[i] = ~nodes[root][RANK];
    }
    return classNum;
}

static unsigned int RectanglesFilter(Rect rects[],const unsigned int rectNum, int groupThreshold, double eps)
{
    if( groupThreshold <= 0 || rectNum == 0)
        return 0;
    unsigned int labels[rectNum];
    unsigned int newRectNum = 0;
    unsigned int classNum = RectanglesCluster(rects,rectNum, labels, eps);
    //Rect rrects[classNum];
    unsigned int rrects[classNum][4];
    unsigned int rweights[classNum];
    for(int i = 0; i < classNum; ++i)
    {
        rrects[i][0] = rrects[i][1] = rrects[i][2] = rrects[i][3] = 0;
        rweights[i] = 0;
    }
    for(int i = 0; i < rectNum; ++i )
    {
        unsigned int cls = labels[i];
        if(!rrects[cls][0] || rrects[cls][0] > rects[i].x)
        {
            rrects[cls][0] = rects[i].x;
        }
        if(!rrects[cls][1] || rrects[cls][1] > rects[i].y)
        {
            rrects[cls][1] = rects[i].y;
        }
        if(rrects[cls][2] < rects[i].x + rects[i].width)
        {
            rrects[cls][2] = rects[i].x + rects[i].width;
        }
        if(rrects[cls][3] < rects[i].y + rects[i].height)
        {
            rrects[cls][3] = rects[i].y + rects[i].height;
        }
        rweights[cls]++;
    }
//    for(int i = 0; i < rectNum; ++i )
//    {
//        unsigned int cls = labels[i];
//        rrects[cls][0] += rects[i].x;
//        rrects[cls][1] += rects[i].y;
//        rrects[cls][2] += rects[i].x + rects[i].width;
//        rrects[cls][3] += rects[i].y + rects[i].height;
//        rweights[cls]++;
//    }
//    for(int i = 0;i < classNum; ++i )
//    {
//        unsigned int wg = rweights[i];
//        rrects[i][0] /= wg;
//        rrects[i][1] /= wg;
//        rrects[i][2] /= wg;
//        rrects[i][3] /= wg;
//    }
    
    for(int i = 0; i < classNum; ++i)
    {
        unsigned int *r1 = rrects[i];
        unsigned int n1 = rweights[i];
        if( n1 <= groupThreshold )
            continue;
        unsigned int j;
        for( j = 0; j < classNum; ++j)
        {
            int n2 = rweights[j];
            
            if( j == i || n2 <= groupThreshold )
                continue;
            unsigned int *r2 = rrects[j];
            
            int dx = (int)( (r2[2] - r2[0]) * eps );
            int dy = (int)( (r2[3] - r2[1]) * eps );
            
            if( i != j &&
               r1[0] >= r2[0] - dx &&
               r1[1] >= r2[1] - dy &&
               r1[2] <= r2[2] + dx &&
               r1[3] <= r2[3] + dy &&
               ((n2 > groupThreshold && n2 > n1) || n1 < groupThreshold) )
                break;
        }
        
        if( j == classNum )
        {
            Rect *rect = rects + newRectNum ++;
            rect->x = r1[0];
            rect->y = r1[1];
            rect->width = r1[2] - r1[0];
            rect->height = r1[3] - r1[1];
        }
    }
    return newRectNum;
}

static void FeaturesOffsets(const unsigned int (*origFeatures)[4],const unsigned int Num,const short rowLen, unsigned int (*offsets)[16])
{
    for(int i = 0;i < Num; ++i)
    {
        const unsigned int x = origFeatures[i][0];
        const unsigned int y = origFeatures[i][1];
        const unsigned int width = origFeatures[i][2];
        const unsigned int height = origFeatures[i][3];
        int index = 0;
        for(int j = 0;j <= 3; ++j)
            for(int k = 0;k <= 3; ++k)
                offsets[i][index++] = (y + j * height) * rowLen + x + k * width;
    }
}

static unsigned char LBPCalc(const unsigned int *position,const unsigned int offsets[16])
{
    int cval = CALC_SUM_OFS_( offsets[5], offsets[6], offsets[9], offsets[10], position);    
    return ((CALC_SUM_OFS_( offsets[0], offsets[1], offsets[4], offsets[5], position ) >= cval) << 7) | // 0
    ((CALC_SUM_OFS_( offsets[1], offsets[2], offsets[5], offsets[6], position ) >= cval) << 6 ) |       // 1
    ((CALC_SUM_OFS_( offsets[2], offsets[3], offsets[6], offsets[7], position ) >= cval) << 5)  |       // 2
    ((CALC_SUM_OFS_( offsets[6], offsets[7], offsets[10], offsets[11], position ) >= cval) << 4)  |     // 5
    ((CALC_SUM_OFS_( offsets[10], offsets[11], offsets[14], offsets[15], position ) >= cval) << 3)|     // 8
    ((CALC_SUM_OFS_( offsets[9], offsets[10], offsets[13], offsets[14], position ) >= cval) << 2)|      // 7
    ((CALC_SUM_OFS_( offsets[8], offsets[9], offsets[12], offsets[13], position ) >= cval) << 1) |      // 6
    (CALC_SUM_OFS_( offsets[4], offsets[5], offsets[8], offsets[9], position ) >= cval);                // 3
}

static inline int IsStageWillIncrese(const Classifier *pClassifier)
{
    return pClassifier->stageThreshold != INT_MIN;
}

static int CascadeClassify(const unsigned int *position,const unsigned int (*offsets)[16])
{
    const unsigned int classifierNum = sizeof(classifiers) / sizeof(*classifiers);
    const Classifier *pClassifier = classifiers;
    int weight = 0, stages = 1;
    for(unsigned int i = 0; i < classifierNum; ++i)
    {
        unsigned char lbpValue = LBPCalc(position, (*offsets++));
        weight += pClassifier->weight[!(pClassifier->isReject[lbpValue>>5] & (1 << (lbpValue & 31)))];
        if(IsStageWillIncrese(pClassifier))
        {
            if(weight < pClassifier->stageThreshold)
                return stages;
            weight = 0;
            ++stages;
        }
        ++pClassifier;
    }
    return 0;
}

static void ObjectDetectOnScale(const unsigned char *grayImage,const short width,const short height,const float scale, Rect rects[],volatile atomic_uint *pRectNum,const unsigned int step)
{
    const short imageWidth = width / scale,imageHeight = height / scale;
    const unsigned int imageArea = ((unsigned int)imageWidth) * imageHeight;
    unsigned int offsets[sizeof(features) / sizeof(*features)][16] = {0};
    FeaturesOffsets(features, sizeof(features) / sizeof(*features), imageWidth, offsets);
    unsigned int *integralImage = 0;
    while (!integralImage) {
        integralImage = (unsigned int *)malloc(imageArea * sizeof(unsigned int));
    }
    Image32Resize(grayImage, width, height, imageWidth, imageHeight, integralImage);
    ImageIntegral(integralImage, imageWidth, imageHeight, integralImage);
    for(int i = 0; i < imageHeight - windowHeight; i += step)
        for(int j = 0;j < imageWidth - windowWidth; j += step)
        {
            unsigned int *p = integralImage + i * imageWidth + j;
            int result = CascadeClassify(p, offsets);
            if(!result)
            {
                unsigned int index = (*pRectNum)++;
                if(index < MAX_BUFFER_SIZE)
                {
                    Rect *pRect = rects + index;
                    pRect->x = j * scale;
                    pRect->y = i * scale;
                    pRect->width = windowWidth * scale;
                    pRect->height = windowHeight * scale;
                }
            }else if(result == 1)
                j += step;
        }
    free(integralImage);
}

Rect ObjectDetect(const unsigned char *grayImage, const short width,const short height)
{
    const static float scaleFactor = 1.0625f;
    float scales[MAX_BUFFER_SIZE] = { 0 };
    Rect rects[MAX_BUFFER_SIZE] = { 0 };
    unsigned int step[MAX_BUFFER_SIZE] = { 0 };
    volatile atomic_uint rectNum = 0, index = 0;
    int scaleNum = 0;
    for(float scale = 1.f;windowWidth * scale < width && windowHeight * scale < height && scaleNum < sizeof(scales) / sizeof(*scales);scale *= scaleFactor)
    {
        scales[scaleNum] = scale;
        step[scaleNum++] = 1 + (scale < 2);
    }
#pragma omp parallel num_threads(THREAD_NUM)
    while (1)
    {
        const unsigned int i = index++;
        if(i >= scaleNum) break;
        ObjectDetectOnScale(grayImage, width, height, scales[i], rects, &rectNum, step[i]);
    }
    rectNum = RectanglesFilter(rects, rectNum, MIN_NEIGHBORS, GROUP_EPS);
    if(rectNum == 0)
        return (Rect){0};
    Rect *pResult = rects;
    if(rectNum > 1)
    {
        for(int i = 1; i < rectNum; ++i)
            if(rects[i].y > pResult->y)
                pResult = rects + i;
    }
    return *pResult;
}








